*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0

* This do file 
	* Generates the final list of MSAs that will be used in all CBSA loops (excluding Puerto Rico)
	* Processes MSA grid files so that all MSA files contain basic information such as identifier, coordintes, etc
	* Appends the data to one large file containing all MSAs.

* Generate a list of CBSA IDs over which to loop 
	qui import delimited "$data_USMETROS/Raw Numeric Data/METRO LIST/METROS.csv", clear
	qui drop if inlist(cbsafp, 32420, 41900, 41980, 38660, 10380, 11640, 25020)			// Drop Puerto Rico CBSAs
	qui save "$data_USMETROS/Raw Numeric Data/METRO LIST/METROS.dta", replace 			// Create a Stata version for later merge of names

* Generate temporary working directory
		capture mkdir $temp
		capture mkdir "$temp/GridLegend"	
	
* Loop to convert grid files into Stata
		qui tab cbsafp
		local Ncbsafp = r(N) // Total number of cities in list
		local count = 1
	levelsof  cbsafp, local(USMETROIDS)		
	 
	// Generate the locals containing all CBSA IDs
	foreach USmid of local USMETROIDS { // local USMETROIDS{ numlist 26420
		di "...Working on CBSA `USmid', number `count' of `Ncbsafp': converting grids to Stata..."
		qui  import delimited "$data_USMETROS/GIS Data/US METRO GRIDS/GRID_`USmid'_final.csv", clear 
		 duplicates report cellid
		qui gen dup = r(unique_value) <r(N) // flagging if there are duplicates
		qui duplicates drop cellid, force   // drop duplicates as a safety net
		qui save "$temp/GridLegend/GRID_`USmid'_final", replace	// Save final grids
		local count = `count'+1
	}
	 
* Append all city grids into one major grid
	clear
	display "...compiling employment type `EmpType' metro-grid data..."	
	qui foreach USmid of local USMETROIDS{
		append using "$temp/GridLegend/GRID_`USmid'_final"
	}
	duplicates report cellid		// another safety net in case any duplicates persist
	duplicates drop cellid, force	// another safety net in case any duplicates persist
	
* Mege metro name
	 merge m:1 cbsafp using "$data_USMETROS/Raw Numeric Data/METRO LIST/METROS.dta", keepusing(name)
	 	qui drop if _m == 2
		qui drop _m
		qui ren name metro_name	
	
* Finalize data set
	gen cell_id = cellid_unique		// Gen grid id and cooridnates from information
	qui split cell_id , p(_)	
	qui ren cell_id2 grid_row
	qui ren cell_id3 grid_col
	* qui ren cell_id1 cbsafp
	qui destring cbsafp, replace
	qui destring cell_id1, replace force
	qui destring grid_row, replace force
	qui destring grid_col, replace force
	gen grid_x = grid_row *250
	gen grid_y = grid_col*250
	qui drop if grid_row == 0 // redundant entry
	* some other potentially useful IDs
	egen double square_id = group(cellid_unique)	// Generate unique numeric grid id
	egen metro_id = group(cbsafp)					// Generate unique numeric metro_id
	qui ren cp_x lon
	qui ren cp_y lat
	qui keep cell_id dup cellid_unique square_id metro_id cbsafp area_g developable lon lat metro_name grid_x grid_y min_x min_y max_x max_y
	qui save "$dataoutput/metro-grid-legend.dta", replace
	
* Finalize individual grid data sets
	local count = 1
	foreach USmid of local USMETROIDS {
	u "$temp/GridLegend/GRID_`USmid'_final", clear
		di "...Working on CBSA `USmid', number `count' of `Ncbsafp': Finalizing grids..."
		qui merge 1:1 cellid_unique using "$dataoutput/metro-grid-legend.dta", keepusing(cell_id square_id metro_id lon lat metro_name grid_x grid_y min_x min_y max_x max_y)
		qui drop if _m == 2
		qui drop _m
		qui  qui drop cp_x cp_y
		qui save "$temp/GridLegend/GRID_`USmid'_final", replace
		local count = `count'+1
		}
	
* End of do file
